# Figure_1.R

# Part of the replication archive for 
#
#   Bullock, John G., and Kelly Rader. 2021. "Response Options and the 
#   Measurement of Political Knowledge." Forthcoming in the British Journal 
#   of Political Science.

library(Bullock)    # for PDF_crop(), qw(), sourcing()
library(dplyr)      # for %>%, filter(), mutate(), select(), etc.
library(grid)
library(gridExtra)  # for grid.arrange()
library(here)       # for here::here()
library(Hmisc)      # for llist()
library(lattice)    # for xyplot()
library(readr)      # for parse_factor(), a better version of factor()
library(survey)     # for svyby(), svydesign(), svymean()
library(tibble)     # for tibble(), used to build dataToPlot

source(here::here("R/SSI_2017_coding.R"))
source(here::here("R/functions/KRR.R"))  # to estimate guessing-corrected correct-response rates


# CONFIGURE PANEL DISPLAY
if (interactive()) {
  SHOW_GUESSING_CORRECTED_PANEL <- TRUE  # make Figure 1 (FALSE) or A2 (TRUE)? 
} else {
  clArgs <- commandArgs(trailingOnly = TRUE)
  if (length(clArgs) <= 0) { 
    writeLines(paste0(
      "clArgs has length ", length(clArgs), 
      ".  This length is less than 1, so the script is stopping."))
    stop()
  }
  SHOW_GUESSING_CORRECTED_PANEL <- as.logical(clArgs[1]) 
}



# USE SURVEY WEIGHTS?
USE_WEIGHTS <- TRUE




# **************************************************************************
# PRELIMINARIES FOR PRINTING THE FIGURE ####
# **************************************************************************
dirOutput        <- here::here('float_output/')
filenameStem     <- ifelse(
  SHOW_GUESSING_CORRECTED_PANEL,
  'Figure_A2',
  'Figure_1')
PDFtitle         <- 'Percentage of Correct Responses by Length and Difficulty of Response-Option Set (SSI 2017)'
lineTypes        <- 'solid'  # c(32, 'solid')
PS_width         <- 12.0
PS_height        <- 12.0
postscriptBackground <- 'transparent'
panelWidth_twoPanelsRaw <- list(1.80, "inches")
panelWidth_twoPanelsGC  <- list(2.50, "inches")
panelWidth_onePanel     <- list(2.40, "inches")
panelHeight_GC          <- list(1.90, "inches")
panelHeight_short       <- list(1.20, "inches") 
panelHeight_tall        <- list(2.90, "inches")
panelHeight_top         <- panelHeight_short 
panelHeight_bottom      <- if (SHOW_GUESSING_CORRECTED_PANEL) panelHeight_short else panelHeight_tall
panelLayout                  <- c(1, 1)  # columns first, then rows
panelLayout_GC               <- c(2, 3) 
panelLayout_simpleConditions <- c(1, 2)  # for top row of Figure 1
panelNumberCol   <- 'gray37' 
xBetween         <- if (SHOW_GUESSING_CORRECTED_PANEL) 1.50 else 0  # horizontal distance between panels
yBetween         <- if (SHOW_GUESSING_CORRECTED_PANEL) 0.67 else 2 
plotLineAlpha    <- .67                                   # transparency (1 = opaque)
plotLineColor    <- rgb(0.67, 0.67, 0.67, plotLineAlpha)  # first three params are gray (lower=darker)
plotLineWidth    <- 1
panelBorderCol   <- 'black'
panelBorderWidth <- .3
baseCexSize      <- if (SHOW_GUESSING_CORRECTED_PANEL) 1 else 0.95
panelTitleSize   <-  .825 * baseCexSize 
xLabSize         <-  .725 * baseCexSize
yLabSize         <-  .725 * baseCexSize
axisTickSize     <- .4
xAxisLabels_largePanel <- c('three\neasy', 'five\neasy', 'three\ndifficult', 'five\ndifficult', paste0("open\U00AD", "\nended"))
xAxisLabels_largePanel[1:4] <- paste0(xAxisLabels_largePanel[1:4], '\nROs')
if (panelLayout[1] == 2) {
  # If creating a two-column figure, make narrower labels.
  xAxisLabels_largePanel <- gsub(', ',                '\n',       xAxisLabels_largePanel)
  xAxisLabels_largePanel <- gsub('response\noptions', 'ROs',      xAxisLabels_largePanel)
}
xAxis_largePanel <- list(
  draw   = TRUE,
  limits = c(1, 5),  # (.67, 5.33),
  labels = c(xAxisLabels_largePanel, ''), 
  at     = 1:length(xAxisLabels_largePanel),
  alternating = c(1, 1),
  tck    = c(axisTickSize, 0), 
  col    = panelBorderCol, 
  cex    = xLabSize,
  axs    = 'i')  # axs = 'i' means that there is no padding around the xlimits

xAxis_simpleConditions          <- xAxis_largePanel
xAxis_simpleConditions$labels   <- list(qw("three five"), qw("easy difficult")) %>%
  lapply(function (x) paste(x, "ROs"))
xAxis_simpleConditions$at       <- list(c(1, 2), c(3, 4)) 
xAxis_simpleConditions$limits   <- list(c(0.9,2.1), c(2.9,4.1))
xAxis_simpleConditions$relation <- "free"

xAxis_GC <- list(
  draw   = TRUE,
  limits = split(
    x = c(rep(c(0.9, 2.1), 4), rep(c(0.7, 5.3), 2)),  # vector of limits
    f = rep(1:6, each = 2)),                          # list elements
  labels = split(
    x = c(
      qw("three five three five easy difficult easy difficult"),
      rep(
        c("three\neasy", "five\neasy", "three\ndifficult", "five\ndifficult",
          paste0("open\U00AD", "\nended")),
        2)),
    f = c(rep(1:4, each=2), rep(5:6, each = 5))),
  at = split(
    x = c(rep(1:2, 4), rep(1:5, 2)),                 # vector of x locations
    f = c(rep(1:4, each = 2), rep(5:6, each = 5))),  # list elements
  cex = xLabSize*.8,
  tck = axisTickSize,
  axs = 'i',  # axs = 'i' means that there is no padding around the xlimits
  relation = 'free'
)
xAxis_GC$labels <- lapply(
  xAxis_GC$labels, 
  function (x) gsub('([eyt])$', '\\1\nROs', x, perl = TRUE))


yAxisLabels_GC <- seq(10, 70, by = 15) %>%
  paste0(., '%') %>%  

  # Minor ticks with no real labels get blank ('') labels  
  { strsplit(paste0(., collapse = '   '), ' ')[[1]] } %>% 

  # Add blank labels for minor tick marks below first label and above last label
  c('', ., '')   

yAxisLabels_GC_bottomRow <- seq(25, 70, by = 15) %>%
  paste0(., '%') %>%  
  { strsplit(paste0(., collapse = '   '), ' ')[[1]] } %>%  # add blank labels 
  c('', ., '')                                             # more blank labels

yAxis_GC <- list(
  draw   = FALSE,  # we'll add tick marks in panel function
  limits = c(
    rep(list(c(-.01, .8)), 4),
    rep(list(c( .15, .8)), 2)),
  labels = c(
    rep(list(yAxisLabels_GC), 4),
    rep(list(yAxisLabels_GC_bottomRow), 2)),
  at = c(
    rep(list(seq(.05, .75, by = .05)), 4),
    rep(list(seq(.20, .75, by = .05)), 2)),
  axs = 'i',  # axs = 'i' means that there is no padding around the xlimits
  relation = 'free'
)




# Y AXIS FOR TOP ROW (TWO PANELS SHOWING ALL FIVE CONDITIONS)
yAxisLabels <- seq(30, 70, by = 10) %>%
  paste0(., '%') %>%  

  # Minor ticks with no real labels get blank ('') labels  
  { strsplit(paste0(., collapse = '  '), ' ')[[1]] }  

yAxisTickLength <- rep(.75*axisTickSize, length(yAxisLabels))
yAxisTickLength[yAxisLabels != ''] <- 1.33*axisTickSize

yAxis_allConditions <- list(
  draw   =  FALSE,  # but panel function will draw info from yAxis_bottomRow
  alternating = c(1, 1),
  limits = c(.24, .76),
  labels = yAxisLabels, 
  at     = seq(.30, .70, by = .05),
  tck    = 0,     # better handled in the panel function for this figure
  col    = panelBorderCol,
  cex    = yLabSize,
  rot    = 0,
  axs    = 'i')


# Y AXES FOR NON-GUESSING-CORRECTED PANELS
yAxisNumROs_labels <- seq(20, 65, by = 15) %>%
  paste0(., '%') %>%  

  # Minor ticks with no real labels get blank ('') labels  
  { strsplit(paste0(., collapse = '   '), ' ')[[1]] } %>% 

  # Add blank labels for minor tick marks below first label and above last label
  c('', ., '')   
yAxisnumROs_tickLength <- rep(.75*axisTickSize, length(yAxisNumROs_labels))
yAxisnumROs_tickLength[yAxisNumROs_labels != ''] <- 1.33*axisTickSize
yAxis_numROs        <- yAxis_allConditions
yAxis_numROs$limits <- c(.1, .75)
yAxis_numROs$labels <- yAxisNumROs_labels
yAxis_numROs$at     <- seq(.15, .70, by = .05)
yAxis_numROs$tickLength <- yAxisnumROs_tickLength


yAxis_diff <- yAxis_numROs
yAxisDiff_labels <- seq(40, 70, by = 10) %>%
  paste0(., '%') %>%  

  # Minor ticks with no real labels get blank ('') labels  
  { strsplit(paste0(., collapse = '  '), ' ')[[1]] } 
yAxisDiff_tickLength <- rep(.75*axisTickSize, length(yAxisDiff_labels))
yAxisDiff_tickLength[yAxisDiff_labels != ''] <- 1.25*axisTickSize

yAxis_diff$limits <- c(.35, .75)
yAxis_diff$labels <- yAxisDiff_labels
yAxis_diff$at     <- seq(.4, .7, by = .05) 
yAxis_diff$tickLength <- yAxisDiff_tickLength

yAxisCombined <- yAxis_numROs
yAxisCombined$at       <- list(yAxis_numROs$at, yAxis_diff$at)
yAxisCombined$limits   <- list(yAxis_numROs$limits, yAxis_diff$limits)
yAxisCombined$labels   <- list(yAxis_numROs$labels, yAxis_diff$labels)
yAxisCombined$relation <- 'free'
yAxisCombined$tickLength <- list(yAxis_numROs$tickLength, yAxis_diff$tickLength)



# Y-AXIS LABEL FOR ENTIRE PANEL
yLabel <- if (SHOW_GUESSING_CORRECTED_PANEL) {
  NULL
} else {
  paste0('Correct\U00AD', 'Response Rates by Response Options')
}



# **************************************************************************
# CREATE THE DATA FRAME ####
# **************************************************************************
# We create two data frames: dataToPlot and dataToPlot_GC. Each data frame has
# four columns: "question" (name of question), "responseOptionSet" (3:easy,
# 5:hard, OE, 3, 5, etc.), "percentCorrect", and "panel". The "panel" column 
# is "raw" for every row in dataToPlot, "guessing-corrected" for every panel 
# in dataToPlot_GC.

# List of dummy variables that indicate whether someone answered correctly.
# The first six items are the ones for which we had five different response-
# option conditions.  
correctVars <- llist(  

  # Items for which we had five different response-option conditions.
  ChiefJustice_correct,
  HowManyJusticesCurrently_correct,
  HowManyJusticesUsually_correct,
  HowManyWomen_correct,
  SenMajLeader_correct,
  TermLength_correct,
  
  # The same six items as above. [2019 09 06]
  ChiefJustice_correct,
  HowManyJusticesCurrently_correct,
  HowManyJusticesUsually_correct,
  HowManyWomen_correct,
  SenMajLeader_correct,
  TermLength_correct,
  
  # Now considering the six items for which we varied only the number of 
  # response options.  [2019 09 06]
  ProcedureForChoosingJustices_correct,
  WatchLawyersArgue_correct,           
  FinalSay_correct,                   
  TieVoteProcedure_correct,           
  CourtPower_correct,                 
  JusticeRemoval_correct)  

correctVarsDF <- data.frame(correctVars) %>%  # need separate DF b/c we need the var. names
  filter(!is.na(originalData$weight)) 
  
correctVarsDO <- correctVarsDF %>%        # DO = "design object"
  { svydesign(
      ids     = ~1, 
      data    = ., 
      weights = if (USE_WEIGHTS) na.omit(originalData$weight) else NULL
    ) 
  }
  


# List of dummy variables indicating whether someone offered an incorrect
# answer -- not a "don't know," not a skip. We need this info for guessing 
# correction.
incorrectNotDK_vars <- list(  
  !ChiefJustice_correct             & !ChiefJustice_CE%in%c("Don't know", "Skipped"),
  !HowManyJusticesCurrently_correct & !HowManyJusticesCurrently_CE%in% c("Don't know", "Skipped"),
  !HowManyJusticesUsually_correct   & !HowManyJusticesUsually_CE%in%c("Don't know", "Skipped"),
  !HowManyWomen_correct             & !HowManyWomen_CE%in%c("Don't know", "Skipped"),
  !SenMajLeader_correct             & !SenMajLeader_CE%in%c("Don't know", "Skipped"),
  !TermLength_correct               & !TermLength_CE%in%c("Don't know", "Skipped"),
  
  # Just duplicating the six lines above.
  !ChiefJustice_correct             & !ChiefJustice_CE%in%c("Don't know", "Skipped"),
  !HowManyJusticesCurrently_correct & !HowManyJusticesCurrently_CE%in% c("Don't know", "Skipped"),
  !HowManyJusticesUsually_correct   & !HowManyJusticesUsually_CE%in%c("Don't know", "Skipped"),
  !HowManyWomen_correct             & !HowManyWomen_CE%in%c("Don't know", "Skipped"),
  !SenMajLeader_correct             & !SenMajLeader_CE%in%c("Don't know", "Skipped"),
  !TermLength_correct               & !TermLength_CE%in%c("Don't know", "Skipped"),

  # Now considering the six items for which we varied only the number of 
  # response options.  
  !ProcedureForChoosingJustices_correct & !ProcedureForChoosingJustices%in%c("-99", "Don't know"),
  !WatchLawyersArgue_correct            & !WatchLawyersArgue%in%c("-99", "Don't know"),
  !FinalSay_correct                     & !FinalSay%in%c("-99", "Don't know"),
  !TieVoteProcedure_correct             & !TieVoteProcedure%in%c("-99", "Don't know"),   
  !CourtPower_correct                   & !CourtPower%in%c("-99", "Don't know"),
  !JusticeRemoval_correct               & !JusticeRemoval%in%c("-99", "Don't know")) %>%

  setNames(names(correctVars))

incorrectNotDK_varsDF <- data.frame(incorrectNotDK_vars) %>%  # need separate DF b/c we need the var. names
  filter(!is.na(originalData$weight))  
incorrectNotDK_varsDO <- incorrectNotDK_varsDF %>%            # DO = "design object"
  { svydesign(
      ids     = ~1, 
      data    = ., 
      weights = if (USE_WEIGHTS) na.omit(originalData$weight) else NULL
    ) 
  }
      

# List of variables indicating the conditions to which subjects were assigned.
conditionVars <- llist(  
  
  # Factor variables for questions that had five conditions.
  ChiefJustice_condition,
  HowManyJusticesCurrently_condition,
  HowManyJusticesUsually_condition,
  HowManyWomen_condition,
  SenMajLeader_condition,
  TermLength_condition,
  
  # Numeric variables for the six questions listed above.
  ChiefJustice_numROs,
  HowManyJusticesCurrently_numROs,
  HowManyJusticesUsually_numROs,
  HowManyWomen_numROs,
  SenMajLeader_numROs,
  TermLength_numROs,

  # Numeric variables for questions that had only "short" and "long" conditions.
  JusticesChosen_numROs,          
  WatchLawyersArgue_numROs,       
  ConflictOverMeaning_numROs,     
  IfJusticesSplit_numROs,         
  CourtPowerDescription_numROs,   
  JusticeRemoval_numROs,     

  labels = FALSE) %>%

  
  # Convert numeric to factor -- necessary for creation of the "questions" 
  # variable below.
  lapply(., factor) 


conditionVarsDF <- data.frame(conditionVars) %>%
  filter(!is.na(originalData$weight))  
  
  

# LISTS INDICATING PERCENTAGES CORRECT AND INCORRECT FOR EACH QUESTION
# Consider "percentCorrect". It is a list with one element for each element 
# of correctVars. And each element is a named vector of the percentages 
# correct in each condition.  
getPercentages <- function (varFormula, myFac, designObject) {
  # Run tapply/svyby to extract means of variable specified in "varFormula" 
  # by the levels specfied in "myFac", a factor variable.  [2020 07 20]
  meanTable      <- svyby(varFormula, myFac, designObject, svymean)
  means          <- meanTable[, 3]
  conditionNames <- rownames(meanTable)
  names(means) <- conditionNames
  means
}

percentCorrect <- rep(list(NULL), length(correctVars))
for (i in seq_along(correctVars)) {
  myFormula <- formula(paste0("~", names(correctVarsDF)[i]))
  percentCorrect[[i]] <- getPercentages(myFormula, conditionVarsDF[[i]], correctVarsDO)
}

percentIncorrectNotDK <- rep(list(NULL), length(incorrectNotDK_vars))
for (i in seq_along(incorrectNotDK_vars)) {
  myFormula <- formula(paste0("~", names(incorrectNotDK_varsDF)[i]))
  percentIncorrectNotDK[[i]] <- getPercentages(myFormula, conditionVarsDF[[i]], incorrectNotDK_varsDO)
}



# LIST INDICATING GUESSING-CORRECTED PERCENTAGES CORRECT
# We need to calculate the number of response options ("numROs") not just  
# for the Chief Justice questions as a whole, but for the "3:easy" condition, 
# the "5:hard" condition, and so on -- so that we can get guessing-corrected 
# response rates in each of those conditions. The same holds true for all of 
# the other questions.  
numROs <- sapply(
  X        = percentCorrect, 
  FUN      = function (x) names(x) %>% 
    gsub(':.*', '', .) %>%  # e.g., convert "3:easy" to "3"
    na_if('OE') %>%         # if the condition label is "OE", code it as having NA response options
    as.integer(), 
  simplify = TRUE)
percentGC <- mapply(
  FUN = KRR, 
  percentCorrect, 
  percentIncorrectNotDK, 
  numROs)  


questionNames <- rep(names(conditionVars), sapply(conditionVars, nlevels)) %>% 
  gsub('_condition|_numROs', '', .)  # e.g., convert "ChiefJustice_condition" to "ChiefJustice" 
dataToPlot <- tibble(
  question          = questionNames,
  responseOptionSet = sapply(conditionVars, levels) %>% unlist,
  percentCorrect    = unlist(percentCorrect),
  panel             = 'raw')  # ChiefJustice, TermLength, etc.

if (SHOW_GUESSING_CORRECTED_PANEL) {
  dataToPlot_GC <- tibble(
    question          = dataToPlot$question,
    responseOptionSet = dataToPlot$responseOptionSet,
    percentCorrect    = unlist(percentGC),
    panel             = 'guessing-corrected')
  
  # There is no guessing-corrected response rate for people who were asked 
  # open-ended questions. For convenience, we just replace these NA values 
  # with the "raw" correct-response rates to the open-ended questions.
  dataToPlot_GC$percentCorrect[dataToPlot_GC$responseOptionSet == 'OE'] <- 
    dataToPlot$percentCorrect[dataToPlot$responseOptionSet == 'OE'] 
  
  dataToPlot <- bind_rows(dataToPlot, dataToPlot_GC)
}


# FILTER OUT THE "SUPER-EASY" CHIEF JUSTICE CONDITION
dataToPlot <- dataToPlot %>% filter(!grepl('superEasy', dataToPlot$responseOptionSet))



# ADD ROWS FOR "DIFFICULT" AND "EASY"  [2020 02 12]
# We want some panels for which the x axis values are just "easy" or 
# "difficult." We now add rows to dataToPlot that will create lines to plot
# in these panels. We do so just by taking unweighted averages: for example, 
# we compute an average "easy" percentage correct for the Chief Justice 
# question by averaging over the "3:easy" and "5:easy" percentages correct 
# for that question, without accounting for the different number of subjects
# in each condition.  
easyHardRows <- dataToPlot %>%
  filter(grepl('\\d:', responseOptionSet)) %>%
  mutate(
    responseOptionSet = dplyr::recode(
      responseOptionSet, 
      "3:easy" = "easy",
      "5:easy" = "easy",
      "3:hard" = "hard",
      "5:hard" = "hard")) %>%
  group_by(panel, question, responseOptionSet) %>%
  dplyr::summarize(percentCorrect = mean(percentCorrect))
  
dataToPlot <- bind_rows(dataToPlot, easyHardRows)


# CREATE ROWS FOR MEAN PERCENT CORRECT
# Create rows for mean percent correct, in each response condition (3:easy,
# 3:hard, 5:easy, 5:hard, 3, 5, and OE) across all relevant questions. 
dataToPlot <- dataToPlot %>%
  group_by(panel, responseOptionSet) %>%
  bind_rows(
    .,
    dplyr::summarize(., question = "mean", percentCorrect = mean(percentCorrect))) %>%
  ungroup() %>% 
  
    
  # ORDER THE COLUMNS  
  # Order response-option conditions so that they have the order that we want
  # for the x axes of our panels.
  mutate(responseOptionSet = parse_factor(
    x = responseOptionSet,
    levels = qw("3:easy 5:easy 3:hard 5:hard OE 3 5 easy hard"))) %>%  
  
  # Order questions for our convenience when viewing the dataToPlot data frame.
  mutate(question= parse_factor(
    x = question,
    levels = qw("ChiefJustice HowManyJusticesCurrently HowManyJusticesUsually 
                 HowManyWomen SenMajLeader TermLength 

                 ConflictOverMeaning CourtPowerDescription IfJusticesSplit
                 JusticeRemoval JusticesChosen WatchLawyersArgue

                 mean"))) %>%  

  # Order panels so that raw is on left, guessing-corrected on right.
  mutate(panel = parse_factor(panel, levels = qw("raw guessing-corrected")))




# CHANGE NAMES OF panel FACTOR FOR GUESSING-CORRECTED FIGURE
# The guessing-corrected figure has six panels. This code creates the 
# appropriate panel names: number_raw, difficulty_raw, numDiff_raw, number_GC,
# and so on.  
if (SHOW_GUESSING_CORRECTED_PANEL) {
  dataToPlot <- dataToPlot %>%
    mutate(panel = dplyr::recode(
        panel, "guessing-corrected" = "GC")) %>%
    mutate(panel = case_when(
      grepl('^\\d$',         responseOptionSet) ~ paste0('number_',     as.character(panel)),
      grepl('^(easy|hard)$', responseOptionSet) ~ paste0('difficulty_', as.character(panel)),
      grepl('^\\d:\\w',      responseOptionSet) ~ paste0('numDiff_',    as.character(panel)),
      grepl('^OE$',          responseOptionSet) ~ paste0('numDiff_',    as.character(panel)),
      TRUE ~ as.character(panel))) %>%
    mutate(panel = parse_factor(
      panel, 
      levels = qw(
        "number_raw number_GC difficulty_raw difficulty_GC 
         numDiff_raw numDiff_GC"))) %>%
 
     # ADD X VARIABLE FOR PLOTTING
     mutate(x = dplyr::recode(
       responseOptionSet,
       "3"="1", "5"="2", "easy"="1", "hard"="2", 
       "3:easy"="1", "5:easy"="2", "3:hard"="3", "5:hard"="4", "OE"="5"))
}


# CHANGE ORDER OF ROWS
# For convenience when examining the data frame 
dataToPlot <- arrange(dataToPlot, panel, question, responseOptionSet) 




# **************************************************************************
# CHECK THE DATA FRAME ####
# **************************************************************************
# Check to see that the averages across questions are correct. The two 
# commands in each group should produce the same number.
if (!sourcing()) {

  if (SHOW_GUESSING_CORRECTED_PANEL) {
    dataToPlot %>% 
      filter(panel == 'numDiff_GC', responseOptionSet == '5:easy', question == 'mean') 
    dataToPlot %>% 
      filter(panel == 'numDiff_GC', responseOptionSet == '5:easy', question != 'mean') %>%
      dplyr::summarize(x = mean(percentCorrect))
  }

  else {
    dataToPlot %>% 
      filter(panel == 'raw', responseOptionSet == '3:hard', question == 'mean') 
    dataToPlot %>% 
      filter(panel == 'raw', responseOptionSet == '3:hard', question != 'mean') %>%
      dplyr::summarize(x = mean(percentCorrect))
    
    dataToPlot %>% 
      filter(panel == 'raw', responseOptionSet == '5', question == 'mean') 
    dataToPlot %>% 
      filter(panel == 'raw', responseOptionSet == '5', question != 'mean') %>%
      dplyr::summarize(x = mean(percentCorrect))
  }  
}



# **************************************************************************
# PANEL FUNCTION: RIGHT-HAND PANEL OF FIGURE 1 ####
# **************************************************************************
panelFunc_rightHandPanel <- function (yAxis, firstPanelNum, ...) {
  
  # list(...)  # useful for debugging

  # CHECK ARGUMENTS
  # Variables explicitly passed to this function will be listed by ls().
  if (! "yAxis"%in%ls()) {  
    stop("The yAxis argument must be supplied.")
  }
  
  trellis.par.set("clip", list(panel="off", strip="off"))
  
  
  # SHADE THE RIGHTMOST PART OF EACH PANEL
  #  lrect(
  #    xleft   = 4,
  #    xright  = xAxis$limits[2],
  #    ybottom = yAxis$limits[1],
  #    ytop    = yAxis$limits[2],
  #    col     = 'gray67',
  #    border  = 'transparent')
  
  
  # TICK MARKS
  # Do this before adding panel number, so that a tick mark won't appear on
  # top of the panel-number box.
  panel.axis(
    side        = "top",
    at          = xAxis_largePanel$at,
    draw.labels = FALSE,
    half        = FALSE,
    outside     = FALSE,
    tck         = axisTickSize)

  panel.axis(
    side        = "left",
    at          = yAxis$at,
    draw.labels = FALSE,
    half        = FALSE,
    outside     = TRUE,
    tck         = yAxisTickLength)

  # Add outside tick marks on the right-hand side of the panel.
  panel.axis(
    side        = 'right',
    at          = yAxis$at,
    draw.labels = FALSE,
    half        = FALSE,
    outside     = TRUE,
    tck         = yAxisTickLength)    

  
  # PANEL NUMBERS IN UPPER RIGHT-HAND CORNER OF EACH PANEL
  # Do this before plotting the panels with panel.xyplot(), so that the 
  # panel borders will appear on top of the grey squares, and so that there 
  # won't be a small white gap between the squares and the borders.  
  # [2020 02 14]
  grid.polygon(
    x  = c(.900, .999, .999,  .900, .900), 
    y  = c(.900, .900, .999,  .999, .900), 
    gp = gpar(fill='gray87', col='gray87'))  
  grid.text(
    label = firstPanelNum + panel.number() - 1, 
    x     = .950, 
    y     = .950, 
    just  = 'center', 
    gp    = gpar(font=2, cex=1.15, col=panelNumberCol))

  
  # PLOT THE DATA
  panel.xyplot(...)  
  

  # Y-AXIS TICK LABELS ON THE LEFT AND THE RIGHT
  grid.text(
    label = yAxis$labels,
    x     = unit(-.09, "npc"),
    y     = unit(yAxis$at, "native"),
    rot   = 0,
    gp    = gpar(cex = yLabSize))
  grid.text(
    label = yAxis$labels,
    x     = unit(1.09, "npc"),
    y     = unit(yAxis$at, "native"),
    rot   = 0,
    gp    = gpar(cex = yLabSize))

  
  # LABEL ABOVE PANEL
  grid.text(
    label = paste0('Correct\U00AD', 'Response Rates by Number\nand Difficulty of Response Options'),
    x = unit(0.5, "npc"),
    y = unit(1.03, "npc"),
    vjust = 0,
    gp = gpar(cex = panelTitleSize, lineheight = 1)
  )
  
}



# **************************************************************************
# PANEL FUNCTION: LEFT-HAND PANELS ####
# **************************************************************************
panelFunc_simpleConditions <- function (yAxis, firstPanelNum, ...) {
  
  # list(...)  # useful for debugging

  # CHECK ARGUMENTS
  # Variables explicitly passed to this function will be listed by ls().
  if (! "yAxis"%in%ls()) {  
    stop("The yAxis argument must be supplied.")
  }
  
  trellis.par.set("clip", list(panel="off", strip="off"))
  
  
  # PLOT THE DATA
  panel.xyplot(...)  
  
  
  # LABELS ABOVE EACH PANEL
  yLabel_simplePanels <- c(
    paste0('Correct\U00AD', 'Response Rates by\nNumber of Response Options'),
    paste0('Correct\U00AD', 'Response Rates by\nDifficulty of Response Options')
  )
  grid.text(
    label = yLabel_simplePanels[panel.number()],
    x = unit(0.5, "npc"),
    y = unit(1.035, "npc"),
    vjust = 0,
    gp = gpar(cex = panelTitleSize, lineheight = 1.0))
  
  
  # Y-AXIS TICK LABELS ON LEFT AND RIGHT
  grid.text(
    label = yAxis$labels[[panel.number()]],
    x     = unit(-.09, "npc"),
    y     = unit(yAxis$at[[panel.number()]], "native"),
    rot   = 0,
    gp    = gpar(cex = yLabSize))
  grid.text(
    label = yAxis$labels[[panel.number()]],
    x     = unit(1.09, "npc"),
    y     = unit(yAxis$at[[panel.number()]], "native"),
    rot   = 0,
    gp    = gpar(cex = yLabSize))


  
  
  # TICK MARKS ON LEFT SIDES OF PANELS
  panel.axis(
    side        = 'left',
    at          = yAxis$at[[panel.number()]],
    draw.labels = FALSE,
    half        = FALSE,
    outside     = TRUE,
    tck         = yAxis$tickLength[[panel.number()]])

  
  # TICK MARKS ON RIGHT SIDES OF PANELS
  panel.axis(
    side        = 'right',
    at          = yAxis$at[[panel.number()]],
    draw.labels = FALSE,
    half        = FALSE,
    outside     = TRUE,
    tck         = yAxis$tickLength[[panel.number()]])
  
  
  # PANEL NUMBERS IN UPPER RIGHT-HAND CORNER OF EACH PANEL
  grid.polygon(
    x  = c(.900, .9975, .9975, .9000, .900), 
    y  = c(.825, .8250, .9975, .9975, .825), 
    gp = gpar(fill='gray87', col='gray87'))  
  grid.text(
    label = firstPanelNum + panel.number() - 1, 
    x     = .95, 
    y     = .9125, 
    just  = 'center', 
    gp    = gpar(font=2, cex=1.00, col=panelNumberCol))
}  



# **************************************************************************
# PANEL FUNCTION: GUESSING-CORRECTED FIGURE (FIGURE A2) ####
# **************************************************************************
panelFunc_GC <- function (...) {
  
  # list(...)  # useful for debugging

  trellis.par.set("clip", list(panel="off", strip="off"))
  
  
  # PLOT THE DATA
  panel.xyplot(...)  
  
  
  # Y-AXIS TEXT LABEL
  grid.text(
    label = paste0("Correct\U00AD", "Response Rates"),
    x   = if (panel.number()%%2 == 1) unit(-.2, "npc") else unit(1.2, "npc"),
    y   = unit(0.5, "npc"), 
    rot = if (panel.number()%%2 == 1) 90 else -90,
    gp  = gpar(cex = yLabSize))
  

  # Y-AXIS TICK MARKS AND TICK LABELS
  getTickLength <- function(panelNum) {
    axisLabels <- yAxis_GC$labels[[panelNum]]
    if_else(axisLabels == '', .75*axisTickSize, 1.25*axisTickSize)
  }
  panel.axis(
    side     = if_else(panel.number()%%2 == 1, 'left', 'right'),
    at       = yAxis_GC$at[[panel.number()]],
    labels   = yAxis_GC$labels[[panel.number()]],
    outside  = TRUE,
    half     = FALSE,
    text.cex = yLabSize * .8,
    tck      = getTickLength(panel.number()))
  
  # Tick marks on interior sides, which don't have y-axis labels  
  panel.axis(
    side    = if_else(panel.number()%%2 == 1, 'right', 'left'),
    at      = yAxis_GC$at[[panel.number()]],
    draw.labels = FALSE,
    outside = FALSE,
    half    = FALSE,
    tck     = getTickLength(panel.number()))
  
  
  # X-AXIS TICK MARKS ON TOPS OF PANELS
  if (panel.number()>=5) {
    panel.axis(
      side    = "top",
      at      = xAxis_GC$at[[panel.number()]],
      draw.labels = FALSE,
      outside = FALSE,
      half    = FALSE,
      tck     = axisTickSize)
  }

  # PANEL NUMBERS IN UPPER RIGHT-HAND CORNER OF EACH PANEL
  grid.polygon(
    x  = c(.900, .9975, .9975, .9000, .900), 
    y  = c(.900, .9000, .9975, .9975, .900), 
    gp = gpar(fill='gray87', col='gray87'))  
  grid.text(
    label = panel.number(), 
    x     = .95, 
    y     = .9485, 
    just  = 'center', 
    gp    = gpar(font=2, cex=1.00, col=panelNumberCol))
}  




# **************************************************************************
# DRAW THE COMPONENT FIGURES OF FIGURE 1 ####
# **************************************************************************
# To create Figure 1, we first create two figures: a top-row figure (with the 
# the top two panels) and a bottom-row figure (with the bottom two panels).   
# Then we combine these figures with grid.arrange().


# SET FIGURE PARAMETERS 
# For example, set the colors of the lines. Note that dataToPlot$question is   
# a factor with 13 levels, and "mean" is the 13th level. The superpose.line  
# setting will assign the 1st color to the lines corresponding to the first  
# level of the question vector, the 2nd color to the lines corresponding to  
# the second level of the question vector, and so on. The "mean" lines in  
# each panel will therefore get the 13th color. And this is so even if we  
# are just plotting data from six individual questions, as in the bottom-row   
# panel.  [2019 09 04]
numQuestions <- dataToPlot %>% { nlevels(.$question) } - 1
trellis.par.set(
  axis.components = list(
    left  = list(
      pad1 = 0.30,
      pad2 = 6.00),
    right = list(
      pad1 = 0.30,
      pad2 = 6.00)),            # distance between axis labels and big y-axis label
  axis.line = list(
    alpha = 1, 
    col   = panelBorderCol,     # to eliminate lattice panel border, set col = "white"
    lty   = 1, 
    lwd   = panelBorderWidth),
  clip = list(
    panel = "off", 
    strip = "off"),
  layout.widths = list(
    ylab = 10),
  par.ylab.text = list(
    cex  = yLabSize,
    font = 1),
  superpose.line = list(        # governs lines when the "groups" argument is used
    col   = c(rep(plotLineColor, numQuestions), 'black'), 
    lty   = lineTypes, 
    lwd   = c(rep(plotLineWidth, numQuestions), 1.5*plotLineWidth))
)


# MAKE THE FIRST TWO PANELS OF FIGURE 1
# This figure has two side-by-side panels. In the first panel, the x-axis 
# values are "3" and "5". In the second panel, they are "easy" and 
# "difficult."
percentagesCorrectPlot_simpleConditions <- xyplot(
  percentCorrect ~ responseOptionSet | panel,
  groups        = question,
  layout        = panelLayout_simpleConditions,
  as.table      = TRUE,
  between       = list(x = xBetween, y = 3),
  data          = dataToPlot %>% 
    filter(., grepl('^(\\d|easy|hard)$', responseOptionSet)) %>%
    mutate(panel = if_else(
      grepl('^\\d$', .$responseOptionSet),
      "number",
      "difficulty")) %>%
    mutate(panel = parse_factor(panel, levels = qw("number difficulty"))), 
  type          = 'l',  
  strip         = FALSE,
  panel         = panelFunc_simpleConditions,
  scales        = list(
    x = xAxis_simpleConditions,
    y = within(yAxisCombined, rm(tickLength))),
  # yAxis_numROs),
  xlab          = '',
  ylab          = '',
  par.settings  = trellis.par.set(clip = list(panel="off", strip="off")),
  xAxis         = xAxis_simpleConditions,
  yAxis         = yAxisCombined,    
  firstPanelNum = 1
)
print(
  percentagesCorrectPlot_simpleConditions,
  panel.width  = panelWidth_twoPanelsRaw,
  panel.height = panelHeight_short)
percentagesCorrectPlot_simpleConditions_grob <- grid.grab(wrap = TRUE)


# MAKE THE RIGHT-HAND PANEL OF FIGURE 1
percentagesCorrectPlot_allConditions <- xyplot(
  percentCorrect ~ responseOptionSet | panel,
  groups        = question,
  layout        = panelLayout,
  between       = list(x = xBetween, y = 0),
  data          = dataToPlot %>% filter(grepl('\\d:|OE', responseOptionSet)),  # filter out questions that have only two conditions 
  type          = 'l',  
  strip         = FALSE,
  panel         = panelFunc_rightHandPanel,
  scales        = list(x = xAxis_largePanel, y = yAxis_allConditions),
  xlab          = '',
  ylab          = '',  # yLabel,
  par.settings  = trellis.par.set("clip", list(panel="off", strip="off")),
  yAxis         = yAxis_allConditions,  # required by the panel function
  firstPanelNum = 3
)
print(
  percentagesCorrectPlot_allConditions,
  panel.width  = if (SHOW_GUESSING_CORRECTED_PANEL) panelWidth_twoPanelsGC else panelWidth_onePanel,
  panel.height = if (SHOW_GUESSING_CORRECTED_PANEL) panelHeight_short else panelHeight_tall)
percentagesCorrectPlot_allConditions_grob <- grid.grab(wrap = TRUE)



# MAKE FIGURE A1, WHICH SHOWS THE GUESSING-CORRECTED DATA  [2020 02 16]
# We plot all six panels at once.
if (SHOW_GUESSING_CORRECTED_PANEL) {
  percentagesCorrectPlot_GC <- xyplot(
    percentCorrect ~ x | panel,
    groups        = question,
    layout        = panelLayout_GC,
    as.table = TRUE,
    between       = list(x = xBetween, y = yBetween),
    data          = dataToPlot, 
    type          = 'l',  
    strip         = FALSE,
    panel         = panelFunc_GC,
    scales        = list(x = xAxis_GC, y = yAxis_GC),
    xlab          = '',
    ylab          = '',  # yLabel,
    par.settings  = trellis.par.set("clip", list(panel="off", strip="off"))
  )
  print(
    percentagesCorrectPlot_GC,
    panel.width  = panelWidth_twoPanelsGC,
    panel.height = panelHeight_GC)
  percentagesCorrectPlot_GC_grob <- grid.grab(wrap = TRUE)
}



# **************************************************************************
# PRINT THE FIGURE TO A PDF FILE
# **************************************************************************
pdf(
  file   = paste0(dirOutput, '/', filenameStem, '.pdf'), 
  width  = PS_width, 
  height = PS_height, 
  paper  = "special", 
  title  = PDFtitle,
  bg     = postscriptBackground)


if (SHOW_GUESSING_CORRECTED_PANEL) {
  grid.draw(percentagesCorrectPlot_GC_grob)
} else {

  # Knit the two rows together. The "rectTransparent" object is just a blank 
  # space that appears between the two rows.
  rectTransparent <- rectGrob(
    x  = .5, 
    y  = .5, 
    gp = gpar(col = 'transparent', fill = 'transparent'))
  grid.arrange(
    grobs = list(
      percentagesCorrectPlot_simpleConditions_grob,
      rectTransparent, 
      percentagesCorrectPlot_allConditions_grob 
    ),
    ncol = 3,
    widths = unit(
      x     = c(panelWidth_twoPanelsRaw[[1]] + .5, .975, panelWidth_onePanel[[1]] + .5), 
      units = 'inches')
  )
}

dev.off()


# CROP THE PDF FILE
PDF_crop(paste0(dirOutput, "/", filenameStem), open = interactive(), deleteOrig = TRUE)



# **************************************************************************
# AUXILIARY INFORMATION: EXTRACT MEANS THAT WE REPORT IN THE PAPER #### 
# **************************************************************************
if (!sourcing()) {

  # "Panel 1 shows that correct-response rates range from..."
  dataToPlot %>% filter(question == 'mean', panel == 'raw')
  
  # "Panel 2 shows that a correction for guessing leaves the basic picture 
  # unchanged...average correct-response rates range from..."
  dataToPlot %>% filter(question == 'mean', panel == 'guessing-corrected')
  
  # Appendix: "In the raw data, this difference of differences is 28% - 6% = 
  # 22 percentage points."  [2020 08 05]
  dataToPlot %>% filter(panel == "numDiff_raw", question == "mean") %>%
    summarize(
      diff1 =  percentCorrect[responseOptionSet == "3:easy"] - percentCorrect[responseOptionSet == "5:hard"], 
      diff2 =  percentCorrect[responseOptionSet == "5:hard"] - percentCorrect[responseOptionSet == "OE"],
      DD    = (percentCorrect[responseOptionSet == "3:easy"] - percentCorrect[responseOptionSet == "5:hard"]) -
                (percentCorrect[responseOptionSet == "5:hard"] - percentCorrect[responseOptionSet == "OE"]))
          
  # Appendix: "In the guessing-corrected data, it is 32% - 4% = 
  # 36 percentage points."  [2020 08 05]
  dataToPlot %>% filter(panel == "numDiff_GC", question == "mean") %>%
    summarize(
      diff1 =  percentCorrect[responseOptionSet == "3:easy"] - percentCorrect[responseOptionSet == "5:hard"], 
      diff2 =  percentCorrect[responseOptionSet == "5:hard"] - percentCorrect[responseOptionSet == "OE"],
      DD    = (percentCorrect[responseOptionSet == "3:easy"] - percentCorrect[responseOptionSet == "5:hard"]) -
                (percentCorrect[responseOptionSet == "5:hard"] - percentCorrect[responseOptionSet == "OE"]))        
                            
  # Appendix: guessing corrections helping to reveal mechanisms
  dataToPlot %>% filter(question == "mean", grepl('^number', panel))
}
